{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 分组过滤，运算，排序"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分组过滤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=pd.DataFrame({\"a\":[\"i\",\"j\",\"k\",\"i\",\"j\",\"k\"],\"b\":[2,1,4,1,2,4],\"c\":[3,2,2,1,2,3]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>i</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>j</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>i</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>j</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  i  2  3\n",
       "1  j  1  2\n",
       "2  k  4  2\n",
       "3  i  1  1\n",
       "4  j  2  2\n",
       "5  k  4  3"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   a  b  c\n",
      "0  i  2  3\n",
      "3  i  1  1\n",
      "\n",
      "   a  b  c\n",
      "1  j  1  2\n",
      "4  j  2  2\n",
      "\n",
      "   a  b  c\n",
      "2  k  4  2\n",
      "5  k  4  3\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in a.groupby(\"a\"):\n",
    "    print(i[1])\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#这里的x为已经groupby之后的数据\n",
    "def gl(x):\n",
    "    return x[\"b\"].sum()>3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "2  k  4  2\n",
       "5  k  4  3"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.groupby(\"a\").filter(gl)\n",
    "#最终结果是满足条件的原始记录（注意结果的形式）\n",
    "#请特别注意各个函数方法的左右效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a\n",
       "k    8\n",
       "Name: b, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p=a.groupby(\"a\").b.sum()\n",
    "p[p>3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 课间题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 请过滤出所有c列中值为偶数的记录(这里不用filter，直接用以前的方法过滤就行)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>j</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>j</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "1  j  1  2\n",
       "2  k  4  2\n",
       "4  j  2  2"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 请过滤出c分组内记录行数大于等于2的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>i</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>j</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>i</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>j</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>k</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  i  2  3\n",
       "1  j  1  2\n",
       "2  k  4  2\n",
       "3  i  1  1\n",
       "4  j  2  2\n",
       "5  k  4  3"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.count().b"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 排序\n",
    "\n",
    "* groupby通常会自动对行标排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "i    2\n",
       "u    3\n",
       "2    6\n",
       "1    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a1=pd.Series([2,3,6,1],index=[\"i\",\"u\",\"2\",\"1\"])#排序需要在同种类型的数据中进行（比如整数与字符就不能比较）\n",
    "a1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    6\n",
       "u    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a1.sort_values(ascending=False)[0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "u    3\n",
       "i    2\n",
       "2    6\n",
       "1    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a1.sort_index(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "      <th>k</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.571647</td>\n",
       "      <td>-1.125259</td>\n",
       "      <td>0.669289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.289666</td>\n",
       "      <td>-0.642328</td>\n",
       "      <td>1.604716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.517962</td>\n",
       "      <td>1.002307</td>\n",
       "      <td>0.217916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.456894</td>\n",
       "      <td>-0.360221</td>\n",
       "      <td>-0.414614</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b         a         k\n",
       "2 -0.571647 -1.125259  0.669289\n",
       "4  0.289666 -0.642328  1.604716\n",
       "1  0.517962  1.002307  0.217916\n",
       "3  0.456894 -0.360221 -0.414614"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1=pd.DataFrame(np.random.randn(4,3),index=[2,4,1,3], columns=[\"b\",\"a\",\"k\"])\n",
    "b1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "      <th>k</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.517962</td>\n",
       "      <td>1.002307</td>\n",
       "      <td>0.217916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.571647</td>\n",
       "      <td>-1.125259</td>\n",
       "      <td>0.669289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.456894</td>\n",
       "      <td>-0.360221</td>\n",
       "      <td>-0.414614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.289666</td>\n",
       "      <td>-0.642328</td>\n",
       "      <td>1.604716</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b         a         k\n",
       "1  0.517962  1.002307  0.217916\n",
       "2 -0.571647 -1.125259  0.669289\n",
       "3  0.456894 -0.360221 -0.414614\n",
       "4  0.289666 -0.642328  1.604716"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>k</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.749984</td>\n",
       "      <td>0.954464</td>\n",
       "      <td>0.668990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.244264</td>\n",
       "      <td>-0.811406</td>\n",
       "      <td>0.234548</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.502138</td>\n",
       "      <td>-1.074694</td>\n",
       "      <td>-0.846381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.616720</td>\n",
       "      <td>1.394678</td>\n",
       "      <td>-0.313095</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         k\n",
       "2  0.749984  0.954464  0.668990\n",
       "4  0.244264 -0.811406  0.234548\n",
       "1  0.502138 -1.074694 -0.846381\n",
       "3 -1.616720  1.394678 -0.313095"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1.sort_index(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "      <th>k</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.571647</td>\n",
       "      <td>-1.125259</td>\n",
       "      <td>0.669289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.289666</td>\n",
       "      <td>-0.642328</td>\n",
       "      <td>1.604716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.456894</td>\n",
       "      <td>-0.360221</td>\n",
       "      <td>-0.414614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.517962</td>\n",
       "      <td>1.002307</td>\n",
       "      <td>0.217916</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b         a         k\n",
       "2 -0.571647 -1.125259  0.669289\n",
       "4  0.289666 -0.642328  1.604716\n",
       "3  0.456894 -0.360221 -0.414614\n",
       "1  0.517962  1.002307  0.217916"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b1.sort_values(by=\"b\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 课间题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>brand</th>\n",
       "      <th>behavr</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10944750</td>\n",
       "      <td>13451</td>\n",
       "      <td>0</td>\n",
       "      <td>06/04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10944750</td>\n",
       "      <td>13451</td>\n",
       "      <td>2</td>\n",
       "      <td>06/04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10944750</td>\n",
       "      <td>13451</td>\n",
       "      <td>2</td>\n",
       "      <td>06/04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10944750</td>\n",
       "      <td>13451</td>\n",
       "      <td>0</td>\n",
       "      <td>06/04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10944750</td>\n",
       "      <td>13451</td>\n",
       "      <td>0</td>\n",
       "      <td>06/04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user  brand  behavr   date\n",
       "0  10944750  13451       0  06/04\n",
       "1  10944750  13451       2  06/04\n",
       "2  10944750  13451       2  06/04\n",
       "3  10944750  13451       0  06/04\n",
       "4  10944750  13451       0  06/04"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r1=pd.read_csv(r\"D:\\t_alibaba_data3.txt\",names=[\"user\",\"brand\",\"behavr\",\"date\"],sep=\"\\t\",dtype={\"behavr\":int})\n",
    "r1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求各个商品的浏览总量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "11     4\n",
       "15     6\n",
       "18     1\n",
       "19     2\n",
       "20     5\n",
       "22    33\n",
       "23     1\n",
       "26     9\n",
       "27     4\n",
       "29    21\n",
       "dtype: int64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "11    4\n",
       "15    6\n",
       "18    1\n",
       "19    2\n",
       "20    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 输出商品id最小的前10个商品的及其浏览量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "11     4\n",
       "15     6\n",
       "18     1\n",
       "19     2\n",
       "20     5\n",
       "22    33\n",
       "23     1\n",
       "26     9\n",
       "27     4\n",
       "29    21\n",
       "dtype: int64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 输出商品浏览最大的前10个商品及其浏览量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "7868     3196\n",
       "11196    2076\n",
       "11080    1652\n",
       "12220    1650\n",
       "21110    1586\n",
       "2257     1387\n",
       "26722    1311\n",
       "3228     1242\n",
       "14261    1240\n",
       "14020    1139\n",
       "dtype: int64"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 输出用户10944750日期最后的20条浏览记录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## series与dataframe的运算\n",
    "\n",
    "* 数据框和序列的运算本质上是一种自动遍历，用来避免直接python循环的好方法\n",
    "\n",
    "* 运算函数：sub;add;div"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 与常数的运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    1\n",
       "c    4\n",
       "d    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a=pd.Series([2,1,4,5],index=[\"a\",\"b\",\"c\",\"d\"])\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    5\n",
       "b    4\n",
       "c    7\n",
       "d    8\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#和数值进行运算完全遵守numpy类似的规则：就是逐个作用到每一个元素\n",
    "a+3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a      7.389056\n",
       "b      2.718282\n",
       "c     54.598150\n",
       "d    148.413159\n",
       "dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.exp(a)#无论是内部函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    10\n",
       "b     7\n",
       "c    22\n",
       "d    31\n",
       "dtype: int64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a**2+6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a   -5\n",
      "b   -6\n",
      "c   -3\n",
      "d   -2\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "def w1(x):#还是自定义函数\n",
    "    return x-9+2\n",
    "\n",
    "print(w1(a))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.265667</td>\n",
       "      <td>-1.037581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.033648</td>\n",
       "      <td>0.443449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.619189</td>\n",
       "      <td>1.741607</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b\n",
       "0 -0.265667 -1.037581\n",
       "1 -1.033648  0.443449\n",
       "2  0.619189  1.741607"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d=pd.DataFrame(np.random.randn(3,2),columns=[\"a\",\"b\"])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-3.265667</td>\n",
       "      <td>-4.037581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-4.033648</td>\n",
       "      <td>-2.556551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-2.380811</td>\n",
       "      <td>-1.258393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b\n",
       "0 -3.265667 -4.037581\n",
       "1 -4.033648 -2.556551\n",
       "2 -2.380811 -1.258393"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d-3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-3.265667</td>\n",
       "      <td>-4.037581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-4.033648</td>\n",
       "      <td>-2.556551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-2.380811</td>\n",
       "      <td>-1.258393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b\n",
       "0 -3.265667 -4.037581\n",
       "1 -4.033648 -2.556551\n",
       "2 -2.380811 -1.258393"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.sub(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.405256</td>\n",
       "      <td>3.363054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.375396</td>\n",
       "      <td>1.037718</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.253211</td>\n",
       "      <td>0.323050</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b\n",
       "0  1.405256  3.363054\n",
       "1  2.375396  1.037718\n",
       "2  4.253211  0.323050"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.exp(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b\n",
      "0 -6.659781 -5.787151\n",
      "1 -6.134836 -6.962976\n",
      "2 -5.552326 -8.129949\n"
     ]
    }
   ],
   "source": [
    "print(w1(d))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 课间题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求每天的总记录数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求平均日记录数，并给出每天相对于平均记录数的差值,然后对差值求绝对值(求绝对值函数为abs())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.4"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "abs(-2.4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1486.8292682926829"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date\n",
       "04/15     207.170732\n",
       "04/16      88.829268\n",
       "04/17     436.829268\n",
       "04/18    1041.170732\n",
       "04/19       3.170732\n",
       "04/20     253.829268\n",
       "04/21       4.170732\n",
       "04/22     399.829268\n",
       "04/23    1445.829268\n",
       "04/24     832.829268\n",
       "04/25     269.829268\n",
       "04/26      50.829268\n",
       "04/27     383.829268\n",
       "04/28     243.829268\n",
       "04/29     342.829268\n",
       "04/30     715.829268\n",
       "05/01     726.829268\n",
       "05/02     452.829268\n",
       "05/03     121.829268\n",
       "05/04     137.829268\n",
       "05/05     197.829268\n",
       "05/06     222.829268\n",
       "05/07     459.829268\n",
       "05/08     496.829268\n",
       "05/09      50.829268\n",
       "05/10       7.170732\n",
       "05/11     331.170732\n",
       "05/12    1422.829268\n",
       "05/13      65.829268\n",
       "05/14     439.829268\n",
       "            ...     \n",
       "07/17     267.829268\n",
       "07/18     133.170732\n",
       "07/19     238.170732\n",
       "07/20      87.170732\n",
       "07/21      26.170732\n",
       "07/22      90.829268\n",
       "07/23     231.829268\n",
       "07/24     271.829268\n",
       "07/25     148.170732\n",
       "07/26     602.170732\n",
       "07/27     649.170732\n",
       "07/28     785.170732\n",
       "07/29     133.170732\n",
       "07/30     196.170732\n",
       "07/31      68.170732\n",
       "08/01     206.170732\n",
       "08/02      92.170732\n",
       "08/03     210.170732\n",
       "08/04      17.170732\n",
       "08/05     311.170732\n",
       "08/06     213.829268\n",
       "08/07     276.829268\n",
       "08/08     357.170732\n",
       "08/09     155.170732\n",
       "08/10     340.170732\n",
       "08/11     407.170732\n",
       "08/12     349.170732\n",
       "08/13     120.170732\n",
       "08/14      91.829268\n",
       "08/15     872.170732\n",
       "Length: 123, dtype: float64"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 与序列以及数据框的运算（索引对齐）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    1\n",
       "c    4\n",
       "d    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a=pd.Series([2,1,4,5],index=[\"a\",\"b\",\"c\",\"d\"])\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     4\n",
       "b     2\n",
       "c     8\n",
       "d    10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a+a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    3\n",
       "b    1\n",
       "c    4\n",
       "j    2\n",
       "p    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b=pd.Series([3,1,4,2,1],index=[\"a\",\"b\",\"c\",\"j\",\"p\"])\n",
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    5.0\n",
       "b    2.0\n",
       "c    8.0\n",
       "d    NaN\n",
       "j    NaN\n",
       "p    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#索引会取并集，找不到相同索引匹配进行运算的则为nan\n",
    "a+b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    5.0\n",
       "b    2.0\n",
       "c    8.0\n",
       "d    5.0\n",
       "j    2.0\n",
       "p    1.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#如果你希望缺失值用其他值来替代而不是空缺，则需要用函数来运算并加入fill_value参数\n",
    "a.add(b,fill_value=0)\n",
    "#上面没找到匹配值的时候则用0来替代\n",
    "#注意加法运算之后数据类型都变为了float"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b\n",
       "0  3  3\n",
       "1  8  4\n",
       "2  0  2"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d=pd.DataFrame(np.random.randint(0,10,size=(3,2)),columns=[\"a\",\"b\"])\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  5  7  8\n",
       "1  8  4  8\n",
       "2  8  3  5\n",
       "3  3  2  2"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c=pd.DataFrame(np.random.randint(0,10,size=(4,3)),columns=[\"a\",\"b\",\"c\"])\n",
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.600000</td>\n",
       "      <td>0.428571</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b     c\n",
       "0  0.600000  0.428571  0.25\n",
       "1  1.000000  1.000000  0.25\n",
       "2  0.000000  0.666667  0.40\n",
       "3  0.666667  1.000000  1.00"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.div(c,fill_value=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 课间题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求各个商品的购买量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求各个商品的浏览量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求各个商品的转化率（总购买比上总浏览）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "11       0.000000\n",
       "15       0.000000\n",
       "18       0.000000\n",
       "19       0.500000\n",
       "20       0.000000\n",
       "22       0.030303\n",
       "23       0.000000\n",
       "26       0.000000\n",
       "27       0.000000\n",
       "29       0.142857\n",
       "33       0.000000\n",
       "38       0.000000\n",
       "39       0.000000\n",
       "43       0.125000\n",
       "45       0.000000\n",
       "53       0.000000\n",
       "57       0.111111\n",
       "58       0.000000\n",
       "62       0.083333\n",
       "72       0.000000\n",
       "83       1.250000\n",
       "90       0.000000\n",
       "91       0.000000\n",
       "103      0.000000\n",
       "104      0.000000\n",
       "111      0.000000\n",
       "116      0.000000\n",
       "119      0.000000\n",
       "120      0.000000\n",
       "121      0.000000\n",
       "           ...   \n",
       "29463    0.000000\n",
       "29464    0.000000\n",
       "29465    0.063492\n",
       "29472    0.105263\n",
       "29473    0.020408\n",
       "29474    0.000000\n",
       "29475    0.000000\n",
       "29477    0.000000\n",
       "29486    0.000000\n",
       "29487    0.000000\n",
       "29488    0.000000\n",
       "29490    0.028169\n",
       "29491    0.000000\n",
       "29493    0.000000\n",
       "29497    0.000000\n",
       "29501    0.000000\n",
       "29502    0.000000\n",
       "29504    0.000000\n",
       "29505    0.142857\n",
       "29509    0.000000\n",
       "29524    0.000000\n",
       "29528    0.000000\n",
       "29529    0.000000\n",
       "29531    0.000000\n",
       "29538    0.000000\n",
       "29539    0.111111\n",
       "29541    0.000000\n",
       "29547    0.019608\n",
       "29551    0.111111\n",
       "29552    0.000000\n",
       "Length: 9526, dtype: float64"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 转化率最高的30个商品及其转化率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "16602         inf\n",
       "27653         inf\n",
       "28617         inf\n",
       "526           inf\n",
       "12018         inf\n",
       "25349         inf\n",
       "24819         inf\n",
       "15959         inf\n",
       "2291          inf\n",
       "24434         inf\n",
       "13017         inf\n",
       "18124         inf\n",
       "23342         inf\n",
       "3051          inf\n",
       "22306         inf\n",
       "14174    5.000000\n",
       "2208     5.000000\n",
       "10690    3.000000\n",
       "948      2.000000\n",
       "17977    2.000000\n",
       "6757     2.000000\n",
       "25333    1.500000\n",
       "2437     1.333333\n",
       "83       1.250000\n",
       "26168    1.222222\n",
       "10308    1.200000\n",
       "15828    1.000000\n",
       "18542    1.000000\n",
       "5478     1.000000\n",
       "1704     1.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 可否同时输出这30个商品转化率，购买量，浏览量等信息（需要迭代）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16602 inf 2 0\n",
      "27653 inf 1 0\n",
      "28617 inf 1 0\n",
      "526 inf 1 0\n",
      "12018 inf 4 0\n",
      "25349 inf 1 0\n",
      "24819 inf 1 0\n",
      "15959 inf 1 0\n",
      "2291 inf 1 0\n",
      "24434 inf 1 0\n",
      "13017 inf 1 0\n",
      "18124 inf 1 0\n",
      "23342 inf 1 0\n",
      "3051 inf 1 0\n",
      "22306 inf 1 0\n",
      "14174 5.0 5 1\n",
      "2208 5.0 5 1\n",
      "10690 3.0 3 1\n",
      "948 2.0 2 1\n",
      "17977 2.0 2 1\n",
      "6757 2.0 2 1\n",
      "25333 1.5 12 8\n",
      "2437 1.3333333333333333 4 3\n",
      "83 1.25 10 8\n",
      "26168 1.2222222222222223 11 9\n",
      "10308 1.2 6 5\n",
      "15828 1.0 2 2\n",
      "18542 1.0 1 1\n",
      "5478 1.0 1 1\n",
      "1704 1.0 1 1\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 求所有购买量大于20的各个商品的转化率(请自己百度去掉nan值的方法)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "554      0.117647\n",
       "905      0.579710\n",
       "911      0.043189\n",
       "2257     0.017304\n",
       "2367     0.105991\n",
       "2683     0.077758\n",
       "3228     0.030596\n",
       "3627     0.035573\n",
       "4949     0.022284\n",
       "7105     0.032061\n",
       "7868     0.038798\n",
       "8637     0.079027\n",
       "8689     0.043434\n",
       "11080    0.016949\n",
       "11196    0.032755\n",
       "12168    0.044481\n",
       "14020    0.036874\n",
       "14261    0.032258\n",
       "15019    0.090164\n",
       "15584    0.053476\n",
       "18024    0.093264\n",
       "18805    0.039792\n",
       "23779    0.111111\n",
       "26288    0.095890\n",
       "26722    0.020595\n",
       "27117    0.283784\n",
       "27791    0.073325\n",
       "29099    0.050955\n",
       "dtype: float64"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
